remove(list = ls() )
require(foreign)
require(tidyverse)
require(gridExtra)
jeremy <- read.dta('~/Dropbox (Personal)/RefugeeBan/Data/CityMapping/tomerge_strict.dta')
census <- read.csv('~/Dropbox (Personal)/RefugeeBan/Data/CityMapping/fips_codes_2010census.csv')
censusTest <- read.csv('~/Dropbox (Personal)/RefugeeBan/Data/CityMapping/fips_codes_2010census.csv')
wraps20002010 <- read.csv('~/Dropbox (Personal)/RefugeeBan/Data/WRAPSData/MX %2D Arrivals by Destination and Nationality-3-noHeader.csv')
wraps20102018 <- read.csv('~/Dropbox (Personal)/RefugeeBan/Data/WRAPSData/MX %2D Arrivals by Destination and Nationality-2-noHeader.csv')
abbreviations <- read.csv('~/Dropbox (Personal)/RefugeeBan/Data/CityMapping/states-abbreviations.csv')
wraps <- bind_rows(wraps20002010, wraps20102018)
class(wraps20002010)
class(wraps20002010)
head(sort(unique(jeremy$fips)))
#jeremy$fips <- as.numeric(jeremy$fips)
#head(sort(unique(jeremy$fips)))
#drop puerto rico
jeremy_noPR <- filter(jeremy, state_fips != '72')
dim(jeremy)
dim(jeremy_noPR)
head(wraps)
#Rename wraps city and state column names
wraps <- rename(wraps, city = Assur_DestinationCity1, state = nat_definition4)
#remove capitalization
abbreviations <- rename(abbreviations, state = State, State.Abbreviation = Abbreviation)
#fix census city title
census <- rename(census, city = GU.Name)
jeremy_noPR <- rename(jeremy_noPR, State.Abbreviation = state)
head(jeremy_noPR)
#  Add zeros to make all state FIPS codes two digits long and all county FIPS codes three digits long
census$State.FIPS.Code2 <- NA
census$County.FIPS.Code2 <- NA
for(i in 1:dim(census)[1]){
census$State.FIPS.Code2[i] <- ifelse(nchar(census$State.FIPS.Code[i])<2, yes = paste0("0", census$State.FIPS.Code[i]), no = census$State.FIPS.Code[i])
}
for(i in 1:dim(census)[1]){
census$County.FIPS.Code2[i] <- ifelse(nchar(census$County.FIPS.Code[i])<3, yes = paste0("0", census$County.FIPS.Code[i]), no = census$County.FIPS.Code[i])
}
# Create a new column in the census data the combines state and column FIPS codes
census$fips <- do.call(paste, c(census[c("State.FIPS.Code2", "County.FIPS.Code2")], sep = ""))
head(census[c("State.FIPS.Code", "County.FIPS.Code", 'fips')])
length(unique(wraps$state))
length(unique(wraps$state))
# drop puerto rico froms WRAPS
wraps_noPR <- filter(wraps, state != 'Puerto Rico' & state != 'Guam' & state != "")
length(unique(wraps_noPR$state))
# Merge full name state with state abbreviation
wraps_w_states <- merge(wraps_noPR, abbreviations, by = 'state')
# Convert all cities and states to lower cases
wraps_w_states$state <- tolower(wraps_w_states$state)
wraps_w_states$State.Abbreviation <- tolower(wraps_w_states$State.Abbreviation)
wraps_w_states$city <- tolower(wraps_w_states$city)
jeremy_noPR$State.Abbreviation <- tolower(jeremy_noPR$State.Abbreviation)
jeremy_noPR$city <- tolower(jeremy_noPR$city_upper)
dim(wraps_noPR)
dim(wraps_w_states)
length(unique(wraps_w_states$State.Abbreviation))
census$State.Abbreviation <- tolower(census$State.Abbreviation)
census$city <- tolower(census$city)
#		jeremy_noPR
sum(is.na(wraps_w_states[, 'city']))
sum(is.na(wraps_w_states[, 'State.Abbreviation']))
sum(is.na(census$fips))
colnames(jeremy_noPR)
jeremy_noPR$cityST <- do.call(paste, c(jeremy_noPR[c("city", "State.Abbreviation")], sep = "_"))
census$cityST <- do.call(paste, c(census[c("city", "State.Abbreviation")], sep = "_"))
wraps_w_states$cityST <- do.call(paste, c(wraps_w_states[c("city", "State.Abbreviation")], sep = "_"))
which(table(census$cityST)==44)
census[which(census$cityST=='washington_oh'), ]
censusTest[which(censusTest$State.Abbreviation=='OH' & censusTest$GU.Name == "Washington"), ]
# Merge city and state name with FIPS code
wraps2 <-
merge(
wraps_w_states,
census[, c('city',
'State.Abbreviation',
'ANSI.Code',
'Entity.Description',
'fips')],
by = c("city", 'State.Abbreviation'),
all.x = F
)
# are there any city/state names in the WRAPS file that you were not able to match to at least one fips?
length(unique(wraps_w_states[, c('city', 'State.Abbreviation')]))
length(unique(wraps2[, c('city', 'State.Abbreviation')]))
length(unique(wraps2[, 'cityST']))
jeremy <- read.dta('~/Dropbox/RefugeeBan/Data/CityMapping/tomerge_strict.dta')
remove(list = ls() )
require(foreign)
require(tidyverse)
require(gridExtra)
jeremy <- read.dta('~/Dropbox (Personal)/RefugeeBan/Data/CityMapping/tomerge_strict.dta')
census <- read.csv('~/Dropbox (Personal)/RefugeeBan/Data/CityMapping/fips_codes_2010census.csv')
censusTest <- read.csv('~/Dropbox (Personal)/RefugeeBan/Data/CityMapping/fips_codes_2010census.csv')
wraps20002010 <- read.csv('~/Dropbox (Personal)/RefugeeBan/Data/WRAPSData/MX %2D Arrivals by Destination and Nationality-3-noHeader.csv')
wraps20102018 <- read.csv('~/Dropbox (Personal)/RefugeeBan/Data/WRAPSData/MX %2D Arrivals by Destination and Nationality-2-noHeader.csv')
abbreviations <- read.csv('~/Dropbox (Personal)/RefugeeBan/Data/CityMapping/states-abbreviations.csv')
jeremy <- read.dta('~/Dropbox/RefugeeBan/Data/CityMapping/tomerge_strict.dta')
census <- read.csv('~/Dropbox/RefugeeBan/Data/CityMapping/fips_codes_2010census.csv')
censusTest <- read.csv('~/Dropbox/RefugeeBan/Data/CityMapping/fips_codes_2010census.csv')
wraps20002010 <- read.csv('~/Dropbox/RefugeeBan/Data/WRAPSData/MX %2D Arrivals by Destination and Nationality-3-noHeader.csv')
wraps20102018 <- read.csv('~/Dropbox/RefugeeBan/Data/WRAPSData/MX %2D Arrivals by Destination and Nationality-2-noHeader.csv')
abbreviations <- read.csv('~/Dropbox/RefugeeBan/Data/CityMapping/states-abbreviations.csv')
wraps <- bind_rows(wraps20002010, wraps20102018)
class(wraps20002010)
class(wraps20002010)
head(sort(unique(jeremy$fips)))
#jeremy$fips <- as.numeric(jeremy$fips)
#head(sort(unique(jeremy$fips)))
#drop puerto rico
jeremy_noPR <- filter(jeremy, state_fips != '72')
dim(jeremy)
dim(jeremy_noPR)
head(wraps)
#Rename wraps city and state column names
wraps <- rename(wraps, city = Assur_DestinationCity1, state = nat_definition4)
#remove capitalization
abbreviations <- rename(abbreviations, state = State, State.Abbreviation = Abbreviation)
#fix census city title
census <- rename(census, city = GU.Name)
jeremy_noPR <- rename(jeremy_noPR, State.Abbreviation = state)
head(jeremy_noPR)
#  Add zeros to make all state FIPS codes two digits long and all county FIPS codes three digits long
census$State.FIPS.Code2 <- NA
census$County.FIPS.Code2 <- NA
for(i in 1:dim(census)[1]){
census$State.FIPS.Code2[i] <- ifelse(nchar(census$State.FIPS.Code[i])<2, yes = paste0("0", census$State.FIPS.Code[i]), no = census$State.FIPS.Code[i])
}
for(i in 1:dim(census)[1]){
census$County.FIPS.Code2[i] <- ifelse(nchar(census$County.FIPS.Code[i])<3, yes = paste0("0", census$County.FIPS.Code[i]), no = census$County.FIPS.Code[i])
}
# Create a new column in the census data the combines state and column FIPS codes
census$fips <- do.call(paste, c(census[c("State.FIPS.Code2", "County.FIPS.Code2")], sep = ""))
head(census[c("State.FIPS.Code", "County.FIPS.Code", 'fips')])
length(unique(wraps$state))
length(unique(wraps$state))
# drop puerto rico froms WRAPS
wraps_noPR <- filter(wraps, state != 'Puerto Rico' & state != 'Guam' & state != "")
length(unique(wraps_noPR$state))
# Merge full name state with state abbreviation
wraps_w_states <- merge(wraps_noPR, abbreviations, by = 'state')
# Convert all cities and states to lower cases
wraps_w_states$state <- tolower(wraps_w_states$state)
wraps_w_states$State.Abbreviation <- tolower(wraps_w_states$State.Abbreviation)
wraps_w_states$city <- tolower(wraps_w_states$city)
jeremy_noPR$State.Abbreviation <- tolower(jeremy_noPR$State.Abbreviation)
jeremy_noPR$city <- tolower(jeremy_noPR$city_upper)
dim(wraps_noPR)
dim(wraps_w_states)
length(unique(wraps_w_states$State.Abbreviation))
census$State.Abbreviation <- tolower(census$State.Abbreviation)
census$city <- tolower(census$city)
#		jeremy_noPR
sum(is.na(wraps_w_states[, 'city']))
sum(is.na(wraps_w_states[, 'State.Abbreviation']))
sum(is.na(census$fips))
colnames(jeremy_noPR)
jeremy_noPR$cityST <- do.call(paste, c(jeremy_noPR[c("city", "State.Abbreviation")], sep = "_"))
census$cityST <- do.call(paste, c(census[c("city", "State.Abbreviation")], sep = "_"))
wraps_w_states$cityST <- do.call(paste, c(wraps_w_states[c("city", "State.Abbreviation")], sep = "_"))
which(table(census$cityST)==44)
census[which(census$cityST=='washington_oh'), ]
censusTest[which(censusTest$State.Abbreviation=='OH' & censusTest$GU.Name == "Washington"), ]
# Merge city and state name with FIPS code
wraps2 <-
merge(
wraps_w_states,
census[, c('city',
'State.Abbreviation',
'ANSI.Code',
'Entity.Description',
'fips')],
by = c("city", 'State.Abbreviation'),
all.x = F
)
# are there any city/state names in the WRAPS file that you were not able to match to at least one fips?
length(unique(wraps_w_states[, c('city', 'State.Abbreviation')]))
length(unique(wraps2[, c('city', 'State.Abbreviation')]))
length(unique(wraps2[, 'cityST']))
wraps_w_states[, c('city', 'State.Abbreviation')]
unique(wraps_w_states[, c('city', 'State.Abbreviation')])
dim(unique(wraps_w_states[, c('city', 'State.Abbreviation')]))
dim(unique(wraps2[, c('city', 'State.Abbreviation')]))
length(unique(wraps2[, 'cityST']))
length(unique(wraps_w_states[, 'wraps_w_states']))
length(unique(wraps_w_states[, 'cityST']))
unique(wraps_w_states[, c('city', 'State.Abbreviation')])
